# Copyright (C) 2015 Lukas Rist
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

import os
import re
import tarfile


class ExploitDB(object):
    def __init__(self, path='modules/handlers/emulators/dork_list/archive/platforms/php/webapps'):
        self.path = path
        self.vuln_list = []
        self.rfi_list = []

    def _extract_archive(self):
        if not os.path.exists(self.path.rsplit('/', 3)[0]):
            if self.path.split('/', 1)[0] == 'archive':
                tar = tarfile.open("archive.tar.gz")
                tar.extractall()
            else:
                tar = tarfile.open("modules/handlers/emulators/dork_list/archive.tar.gz")
                tar.extractall("modules/handlers/emulators/dork_list/")
            tar.close()

    def _get_exploits(self):
        return os.listdir(self.path)

    @classmethod
    def _extract_paths(cls, line):
        pattern = u'(http[s]?://[a-z]+?\.?[a-z]*)(/[^\s]+?\.[a-z]+)(\?)([^\s]+=[^\s]+)'
        regx = re.compile(pattern, re.IGNORECASE)
        matches = re.findall(regx, line)
        return [match[1] for match in matches if len(match[1]) > 0]

    def _select_line(self, line):
        if "/" in line:
            if "rfi" in line.lower():
                [self.rfi_list.append(vuln) for vuln in self._extract_paths(line)]
            else:
                [self.vuln_list.append(vuln) for vuln in self._extract_paths(line)]

    def _process_exploits(self):
        exploits = self._get_exploits()
        while len(exploits) > 0:
            exploit = exploits.pop()
            with open(os.path.join(self.path, exploit), 'r') as exploit_fp:
                lines = exploit_fp.readlines()
                [self._select_line(line) for line in lines]

    @classmethod
    def _get_archive(cls):
        # http://www.exploit-db.com/archive.tar.bz2
        pass

    def get_dorks(self):
        self._get_archive()
        self._extract_archive()
        self._process_exploits()
        self.vuln_list = list(set(self.vuln_list))
        self.rfi_list = list(set(self.rfi_list))


if __name__ == '__main__':
    e = ExploitDB(path='archive/platforms/php/webapps')
    e.get_dorks()
